/* 
    Purpose: Using the 1970 Census (1% sample), this file locates black and white men aged 
             30-50 who are fathers of a child younger than 18 in the same household. Other 
             variables necessary to create average predicted father income (in 4b) are also 
             cleaned.

    Note: Income was asked of all individuals 14+ in the 1970 Census, 
          so there's no need to use a sample line weight (or any weight). 

    Creates: Census1970_fathers_ages30to50.dta
*/

clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"

    use ./input/Census1970_1pct_raw.dta, clear //download from IPUMS USA
        tab perwt //Confirmed: everyone receives a weight of 1.

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** SET UP INCOME VARIABLES
***************************

* Fix income variables 
    replace inctot=. if inctot==9999999 
    replace inctot=0 if inctot<0
    
* Family income (v1--Census variable)
    replace ftotinc=. if ftotinc==9999999 
    replace ftotinc=0 if ftotinc<0
    
* Family income (v2--manually construct by summing individual income of family unit members)
    bysort serial famunit: egen fam_income = sum(inctot)

* Count discrepancies between v1 and v2 
    /*Note: Discrepancies appear to come 
            mostly from individuals living 
            in group quarters. */           
    count if ftotinc==. & fam_income>0 
 
* Harmonize v1 and v2            
    replace ftotinc = fam_income if ftotinc==. & fam_income>0
    
* Household income

    //Grab one family member's income 
    sort serial famunit pernum
    by serial famunit: gen fam_head = _n==1
    
    gen temp = 0
    replace temp = ftotinc if fam_head==1 
    replace temp = 0 if ftotinc==. 
    
    //Add up incomes of "separate" families within a serial to get household income (i.e. income by serial number)
    bysort serial: egen hh_income = sum(temp)
    drop temp fam_head fam_income 
    
    rename ftotinc fam_income 

/*
   Convert income variables to 1950 dollars using the CPI: 
   Source: https://www.minneapolisfed.org/about-us/monetary-policy/inflation-calculator/consumer-price-index-1913-)
*/
    gen CPI1970 = 38.8
    gen CPI1950= 24.1
    
    foreach var of varlist inctot fam_income hh_income {
    replace `var' = `var' * (CPI1950 / CPI1970)
    }
  
* Keep respondents with non-zero and non-missing income 
    foreach var of varlist inctot fam_income hh_income {
    drop if `var'==0 | `var'==.
    }  
    
    tempfile fulldata
    save `fulldata'

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** IDENTIFY FATHERS
***************************
    
    keep if age<18 //Restrict to children younger than 18
    keep serial poploc age
    
    replace poploc=. if poploc==0 
    drop if poploc==. //Exclude children without a father in the house
    
* Count number of kids per father
    gen tagkid =1
    bysort serial poploc: egen kidsperdad = total(tagkid)
    label var kidsperdad "Number of kids living in Census father's household"
            
    bysort serial poploc: keep if _n==1 //Keep all unique father ids. Some fathers will have multiple children in the Census. 
    rename poploc pernum
    drop age tagkid 

    tempfile children 
    save `children'

* Keep the sample of fathers 
    use `fulldata', clear
    merge 1:1 serial pernum using `children'
    keep if _merge==3 
    drop _merge    

* Household-size adjusted weight
    gen wgt1970_hhsizeadj = perwt*kidsperdad
    label var wgt1970_hhsizeadj "Alternative Census weight; adjusted for # kids in father's household"

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** CREATE OTHER NECESSARY VARIABLES
***************************
  
* Keep black and white fathers ages 30 to 50
    keep if inrange(age,30,50) 
    keep if race==1 | race==2 

    tab statefip, m
    rename statefip fips
    
* Region of current residence
    drop region 

    * Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
    * Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
    /* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
              Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
    * West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington --note: Census puts AK and HI in with Pacific division
   
    gen region_merge=.    
    replace region_merge=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
    replace region_merge=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46 
    replace region_merge=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 ///
    | fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48 
    replace region_merge=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15 
    tab region_merge, m 
 
    label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
    label values region_merge region_l
    tab region_merge, m
   
    gen south_merge = region_merge==3
  
* Education variable 

    gen edu=.
    replace edu=1 if educd<=25 //<grade school (includes people with no schooling)
    replace edu=2 if educd==26 //8th grade
    replace edu=3 if inlist(educd,30,40,50,61) //<hs
    replace edu=4 if inlist(educd,60,62,63,64) //hs
    replace edu=5 if educd>64 & educ<999 //>hs. "999" is missing
    tab educd edu, m

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
**** ASSIGN COARSENED OCCS
****************

* Set up variables
    sort occ1950

* Count # of Census occupations in 1970 data
    bysort occ1950: gen nvals = _n ==1
    count if nvals==1 

* Separate people with occupations in 200's based on self-employment
    replace occ1950=occ1950+1000 if (occ1950>=200 & occ1950<=290) & classwkr==1
    
* Crosswalk Census occupations to coarsened ANES occupations
    merge m:1 occ1950 using ../Crosswalks/Crosswalk_1950Census_toANES.dta
    assert _merge!=1
    drop if _merge==2
    drop _merge
     
    assert occ1950==997 if occ1950ej==. //997 = occupation missing or unknown
    drop if occ1950ej==.
    tab occ1950 if occ1950ej==99, m nol
            
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
**** SAVE 
****************

* Keep relevant variables 
    rename occ1950ej fatheroccej

    keep race south_merge region_merge edu fatheroccej inctot fam_income hh_income perwt wgt* age 
 
    gen census=1
    label var census "Census obs"
    
    foreach var of varlist inctot fam_income hh_income {
    gen log_father_`var' = log(`var')
    label var log_father_`var' "Log `var', Census"
    label var `var' "`var', Census"
    }
  
    rename fam_income faminc
    rename hh_income HHinc

    compress 
    save ./output/Census1970_fathers_ages30to50.dta, replace 
                    
     
